This file will serve to analyze text data acquired from participants during TSST and Non-TSST sessions based on sentiment.
Included lexicons/dictionaries are nrc by Saif Mohammad and Peter Turney, AFINN by Finn Arup Nielson, bing by Bing Liu and collaborators, and loughran.
This section of the document walks through importing the data and cleaning it using various packages. Cleaning data includes removing special characters and stopwords, words that appear frequently in text data but provide little to no information alone, like “the”,“at”, and “is”.
# Load in data -----------------------------------------
# using https://www.kaggle.com/rtatman/tutorial-sentiment-analysis-in-r code
# also using for much of the code https://www.red-gate.com/simple-talk/sql/bi/text-mining-and-sentiment-analysis-with-r/
setwd('~/Box/Bakkour-Lab/users/abby/SPI-analysis')
text <- read.csv("text_df.csv")
# ALTERNATIVELY, you can do the following code
# text <- read.csv(file.choose()) # choosing the file interactively from working directory. when running this, you should be prompted to choose your file directly like you would in Finder on a mac
# Create 4 dataframes for "stress and describe" scenarios, "no stress and imagine" scenarios, "no stress and describe" scenarios, and "no stress and imagine" scenarios ---------------------------------
# NOTE: I created dataframes for each of the following, called "sdescribetextdf", "simaginetextdf", "nsdescribetextdf", and "nsimaginetextdf". HOWEVER, these are strictly for viewing purposes. For the continued text cleaning and analysis, we will use the values "sdescribetext", "simaginetext", "nsdescribetext", and "nsimaginetext", as these are characters and able to be manipulated by us
# stress session and describe prompt
stressdescribe <- text %>%
filter(session == "stress" & prompt == "describe_writeout")
sdescribetext <- stressdescribe[,-c(1,2,3,5,6,7)]
sdescribetextdf <- as.data.frame(sdescribetext) # JUST FOR VIEWING
class(sdescribetext) # character
## [1] "character"
# stress session and imagine prompt
stressimagine <- text %>%
filter(session == "stress" & prompt == "imagine_writeout")
simaginetext <- stressimagine[,-c(1,2,3,5,6,7)]
simaginetextdf <- as.data.frame(simaginetext) # JUST FOR VIEWING
class(simaginetext) # character
## [1] "character"
# no stress session and describe prompt
nostressdescribe <- text %>%
filter(session == "nostress" & prompt == "describe_writeout")
nsdescribetext <- nostressdescribe[,-c(1,2,3,5,6,7)]
nsdescribetextdf <- as.data.frame(nsdescribetext) # JUST FOR VIEWING
class(nsdescribetext) # character
## [1] "character"
# no stress session and imagine prompt
nostressimagine <- text %>%
filter(session == "nostress" & prompt == "imagine_writeout")
nsimaginetext <- nostressimagine[,-c(1,2,3,5,6,7)]
nsimaginetextdf <- as.data.frame(nsimaginetext) # JUST FOR VIEWING
class(nsimaginetext) # character
## [1] "character"
# Now our 4 different dataframes to analyze separately are titled "sdescribetextdf", "simaginetextdf", "nsdescribetextdf", and "nsimaginetextdf" -- HOWEVER, we should stick to using the non-dataframe values for our analyses, as these are in the format of "character", which allows us to manipulate them
# Tokenize data ------------
# Next, we need to tokenize our data. This makes every row be a single token to be analyzed.
# This function also importantly removes punctuation and makes all of the letters lowercase, which will be useful for further analysis
# stress session and describe prompt
sdtoken<- data_frame(text = sdescribetext) %>%
unnest_tokens(output = word, input = text, token = "words")
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
# stress session and imagine prompt
sitoken<- data_frame(text = simaginetext) %>%
unnest_tokens(output = word, input = text, token = "words")
# no stress session and describe prompt
nsdtoken<- data_frame(text = nsdescribetext) %>%
unnest_tokens(output = word, input = text, token = "words")
# no stress session and imagine prompt
nsitoken<- data_frame(text = nsimaginetext) %>%
unnest_tokens(output = word, input = text, token = "words")
# Remove stopwords ------------
# We also want to use the data set stop_words from tinytext to pull out the words that won't be sentimental
# To see what stopwords are, you can make use of the function data(stop_words), stop_words
# stress session and describe prompt
sdtoken <- anti_join(sdtoken, stop_words)
## Joining, by = "word"
# stress session and imagine prompt
sitoken <- anti_join(sitoken, stop_words)
## Joining, by = "word"
# no stress session and describe prompt
nsdtoken <- anti_join(nsdtoken, stop_words)
## Joining, by = "word"
# no stress session and imagine prompt
nsitoken <- anti_join(nsitoken, stop_words)
## Joining, by = "word"
# If you run this section separately from the tokenizing section from before, you will see the number of observations drop dramatically as the stop_words are removed!
# Importantly, we don't seem to see a significant difference between the number of words used when participants were stressed (TSST session) compared to the non-TSST session.
# Now our data is sorted, the stopwords and punctuation are removed, and the words are all lowercased --- let's move onto observing features of the text!
Next, we should look a bit closer at these four datasets to see what we might want to look at more closely when it comes to visualization and, eventually, sentiment analysis. To do this, let’s look at the types of words used and how often they are used in comparison to others.
# Sort the words in the datasets by frequency ------------------------
# stress session and describe prompt
sdtextfreq <- sdtoken %>%
count(word, sort = TRUE) # this TRUE tells R to sort the words by greatest to least (i.e. sort your data with the most used words at the top and the least at the bottom)
sdtextfreq
## # A tibble: 1,273 x 2
## word n
## <chr> <int>
## 1 spend 91
## 2 person 47
## 3 buy 40
## 4 money 39
## 5 buying 32
## 6 food 23
## 7 dollars 20
## 8 store 20
## 9 drinks 18
## 10 car 16
## # … with 1,263 more rows
# stress session and imagine prompt
sitextfreq <- sitoken %>%
count(word, sort = TRUE)
sitextfreq
## # A tibble: 1,210 x 2
## word n
## <chr> <int>
## 1 buy 90
## 2 spend 70
## 3 money 41
## 4 store 25
## 5 food 23
## 6 friends 22
## 7 days 19
## 8 hair 14
## 9 friend 13
## 10 pair 13
## # … with 1,200 more rows
# no stress session and describe prompt
nsdtextfreq <- nsdtoken %>%
count(word, sort = TRUE)
nsdtextfreq
## # A tibble: 1,257 x 2
## word n
## <chr> <int>
## 1 spend 77
## 2 buy 64
## 3 money 54
## 4 food 35
## 5 person 26
## 6 buying 24
## 7 spent 24
## 8 store 21
## 9 drinks 20
## 10 20 18
## # … with 1,247 more rows
# no stress session and imagine prompt
nsitextfreq <- nsitoken %>%
count(word, sort = TRUE)
nsitextfreq
## # A tibble: 1,238 x 2
## word n
## <chr> <int>
## 1 buy 94
## 2 spend 70
## 3 money 61
## 4 food 41
## 5 friends 40
## 6 store 28
## 7 days 22
## 8 rest 21
## 9 walk 20
## 10 purchase 19
## # … with 1,228 more rows
# Graph the top 10 words for each dataset on a bar plot ------------------------
# stress session and describe prompt
sdtextfreq %>%
mutate(word = reorder(word, n)) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(aes(word, n)) +
geom_bar(stat = "identity", color = "#9DB6BC", fill = "#B2D1D8") +
coord_flip() +
ylab("Frequency of Word Occurrence") +
xlab("Word Used") +
ggtitle("Most Commonly Used Words In Stress and Describe Scenario") +
theme(plot.title = element_text(hjust = 0.5))
# stress session and imagine prompt
sitextfreq %>%
mutate(word = reorder(word, n)) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(aes(word, n)) +
geom_bar(stat = "identity", color = "#8895B4", fill = "#A3B3D8") +
coord_flip() +
ylab("Frequency of Word Occurrence") +
xlab("Word Used") +
ggtitle("Most Commonly Used Words In Stress and Imagine Scenario") +
theme(plot.title = element_text(hjust = 0.5))
# no stress session and describe prompt
nsdtextfreq %>%
mutate(word = reorder(word, n)) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(aes(word, n)) +
geom_bar(stat = "identity", color = "#88A48F", fill = "#ABCFB4") +
coord_flip() +
ylab("Frequency of Word Occurrence") +
xlab("Word Used") +
ggtitle("Most Commonly Used Words In No Stress and Describe Scenario") +
theme(plot.title = element_text(hjust = 0.5))
# no stress session and imagine prompt
nsitextfreq %>%
mutate(word = reorder(word, n)) %>%
arrange(desc(n)) %>%
slice(1:10) %>%
ggplot(aes(word, n)) +
geom_bar(stat = "identity", color = "#AB9BB5", fill = "#CDBBD9") +
coord_flip() +
ylab("Frequency of Word Occurrence") +
xlab("Word Used") +
ggtitle("Most Commonly Used Words In No Stress and Imagine Scenario") +
theme(plot.title = element_text(hjust = 0.5))
# Visualize frequently used words on a wordcloud ------------------------
# stress session and describe prompt
sdwordcloud <- sdtoken %>%
count(word, sort = TRUE) %>%
with(wordcloud(word, n, max.words = 75, random.order = FALSE, col=c("cadetblue3", "goldenrod2", "coral3"))) # random.order = FALSE tells R to put the most frequent words in the middle of the wordcloud and move outwardly by frequency; adding color will help break up the tiers of words based on how much they are used in the text data
# stress session and imagine prompt
siwordcloud <- sitoken %>%
count(word, sort = TRUE) %>%
with(wordcloud(word, n, max.words = 75, random.order = FALSE, col=c("cadetblue3", "goldenrod2", "coral3")))
# no stress session and describe prompt
nsdwordcloud <- nsdtoken %>%
count(word, sort = TRUE) %>%
with(wordcloud(word, n, max.words = 75, random.order = FALSE, col=c("cadetblue3", "goldenrod2", "coral3")))
# no stress session and imagine prompt
nsiwordcloud <- nsitoken %>%
count(word, sort = TRUE) %>%
with(wordcloud(word, n, max.words = 75, random.order = FALSE, col=c("cadetblue3", "goldenrod2", "coral3")))
# Compare word use across 4 scenarios ----
Now, let’s start to look into sentiments in the words used. To do this, we will explore the data by using different sentiment lexicons.
To simply see the lexicons, use get_sentiments("lexionname"). For example, to view the entire nrc lexicon, do get_sentiments("nrc")
To only look at certain types of sentiments, use the filter pipe. For example, to only view fear sentiments within the nrc lexicon, do get_sentiments("nrc") %>% filter(sentiment == "fear")
This dictionary will provide either positive or negative sentiments. So, we will sort the data in all 4 datasets and then visualize the use of positive and negative words.
We will sort our text data by either positive or negative sentiments and create a new variable that shows the overall sentiment. A positive value here indicates and overall positive sentiment, while a negative value here indicates an overall negative sentiment.
bing <- get_sentiments("bing") # call the bing lexicon
glimpse(bing) # look at the lexicon and how it categorizes words
## Rows: 6,786
## Columns: 2
## $ word <chr> "2-faces", "abnormal", "abolish", "abominable", "abominably"…
## $ sentiment <chr> "negative", "negative", "negative", "negative", "negative", …
# stress session and describe prompt ------------------------------------
sdbing1 <- sdtoken %>%
inner_join(get_sentiments("bing")) %>%
count(sentiment) %>% # count the number of positive and negative words
spread(sentiment, n) %>% # made data wide instead of narrow
mutate(overallsentiment = positive - negative) # make a variable named sentiment that is the number of positive words minus the number of negative words to give you an overall positive or negative sentiment
## Joining, by = "word"
sdbing <- data.frame(t(sdbing1)) # flip dataframe to make more graph-able
colnames(sdbing) <- c("n") # rename column
# visualize sentiments on a bar plot
sdbing %>% # graph the sentiment groups
ggplot(aes(reorder(c("Negative", "Positive", "Overall Sentiment"), -n), n)) +
geom_bar(stat = "identity", color = "#9DB6BC", fill = "#B2D1D8") +
ggtitle("Positive and Negative Words In Stress and Describe Scenario (bing lexicon)") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment") +
ylim(0, 125) +
theme(plot.title = element_text(hjust = 0.5))
# visualize sentiments on a word cloud
sdbingwc <- sdtoken %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#993434", "#6BAB7B"), max.words = 75)
## Joining, by = "word"
# stress session and imagine prompt ------------------------------------
sibing1 <- sitoken %>%
inner_join(get_sentiments("bing")) %>%
count(sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(overallsentiment = positive - negative)
## Joining, by = "word"
sibing <- data.frame(t(sibing1))
colnames(sibing) <- c("n")
# visualize sentiments on a bar plot
sibing %>%
ggplot(aes(reorder(c("Negative", "Positive", "Overall Sentiment"), -n), n)) +
geom_bar(stat = "identity", color = "#8895B4", fill = "#A3B3D8") +
ggtitle("Positive and Negative Words In Stress and Imagine Scenario (bing lexicon)") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment") +
ylim(0, 125) +
theme(plot.title = element_text(hjust = 0.5))
# visualize sentiments on a word cloud
sibingwc <- sitoken %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#993434", "#6BAB7B"), max.words = 75)
## Joining, by = "word"
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): frozen could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): illusions could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): mystery could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): rollercoaster could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): slowly could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): smash could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): smudged could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): snag could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): sticky could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): stolen could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): straining could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): stupid could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): suffer could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): tank could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): tired could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): trashy could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): trauma could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): wild could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): worn could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("#993434", "#6BAB7B"), max.words =
## 75): wrong could not be fit on page. It will not be plotted.
# no stress session and describe prompt ------------------------------------
nsdbing1 <- nsdtoken %>%
inner_join(get_sentiments("bing")) %>%
count(sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(overallsentiment = positive - negative)
## Joining, by = "word"
nsdbing <- data.frame(t(nsdbing1))
colnames(nsdbing) <- c("n")
# visualize sentiments on a bar plot
nsdbing %>%
ggplot(aes(reorder(c("Negative", "Positive", "Overall Sentiment"), -n), n)) +
geom_bar(stat = "identity", color = "#88A48F", fill = "#ABCFB4") +
ggtitle("Positive and Negative Words In No Stress and Describe Scenario (bing lexicon)") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment") +
ylim(0, 125) +
theme(plot.title = element_text(hjust = 0.5))
# visualize sentiments on a word cloud
nsdbingwc <- nsdtoken %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#993434", "#6BAB7B"), max.words = 75)
## Joining, by = "word"
# no stress session and imagine prompt ------------------------------------
nsibing1 <- nsitoken %>%
inner_join(get_sentiments("bing")) %>%
count(sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(overallsentiment = positive - negative)
## Joining, by = "word"
nsibing <- data.frame(t(nsibing1))
colnames(nsibing) <- c("n")
# visualize sentiments on a bar plot
nsibing %>%
ggplot(aes(reorder(c("Negative", "Positive", "Overall Sentiment"), -n), n)) +
geom_bar(stat = "identity", color = "#AB9BB5", fill = "#CDBBD9") +
ggtitle("Positive and Negative Words In No Stress and Imagine Scenario (bing lexicon)") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment") +
ylim(0, 125) +
theme(plot.title = element_text(hjust = 0.5))
# visualize sentiments on a word cloud
nsibingwc <- nsitoken %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#993434", "#6BAB7B"), max.words = 75)
## Joining, by = "word"
# ***********not sure how to reorder bars where overall sentiment is always on one side/make it a different color?
# all 4 scenarios have an overall positive sentiment
Now let’s look into the nrc lexicon to see what specific emotions might arise from this text data.
This dictionary will provide numerous sentiments, including specific emotions like fear. Analysis will include assessment of which words are most commonly used, their sentiment category, their textresponse information, the number of times used, and the proportion in which they were used.
nrc <- get_sentiments("nrc") # obtain nrc lexicon
glimpse(nrc) # view the sentiment types within the nrc lexicon
## Rows: 13,901
## Columns: 2
## $ word <chr> "abacus", "abandon", "abandon", "abandon", "abandoned", "aba…
## $ sentiment <chr> "trust", "fear", "negative", "sadness", "anger", "fear", "ne…
# stress session and describe prompt ------------------------------------
sdnrc <- nrc %>%
inner_join(sdtoken) %>%
count(sentiment, sort = TRUE)
## Joining, by = "word"
sdnrc
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 positive 334
## 2 trust 207
## 3 joy 196
## 4 anticipation 192
## 5 surprise 101
## 6 negative 100
## 7 anger 72
## 8 sadness 54
## 9 fear 41
## 10 disgust 20
# visualize sentiments on a bar plot -----
sdnrc %>%
ggplot(aes(reorder(sentiment, -n), n)) +
geom_bar(stat = "identity", color = "#9DB6BC", fill = "#B2D1D8") +
ggtitle("NRC Sentiments In Stress and Describe Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Type") +
ylim(0, 500) +
theme(plot.title = element_text(hjust = 0.5))
# stress session and imagine prompt ------------------------------------
sinrc <- nrc %>%
inner_join(sitoken) %>%
count(sentiment, sort = TRUE)
## Joining, by = "word"
sinrc
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 positive 359
## 2 joy 199
## 3 anticipation 192
## 4 trust 181
## 5 negative 110
## 6 surprise 109
## 7 anger 75
## 8 sadness 43
## 9 fear 32
## 10 disgust 31
# visualize sentiments on a bar plot
sinrc %>%
ggplot(aes(reorder(sentiment, -n), n)) +
geom_bar(stat = "identity", color = "#8895B4", fill = "#A3B3D8") +
ggtitle("NRC Sentiments In Stress and Imagine Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Type") +
ylim(0, 500) +
theme(plot.title = element_text(hjust = 0.5))
# no stress session and describe prompt ------------------------------------
nsdnrc <- nrc %>%
inner_join(nsdtoken) %>%
count(sentiment, sort = TRUE)
## Joining, by = "word"
nsdnrc
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 positive 348
## 2 joy 240
## 3 anticipation 231
## 4 trust 218
## 5 negative 131
## 6 surprise 123
## 7 anger 110
## 8 sadness 61
## 9 fear 49
## 10 disgust 24
# visualize sentiments on a bar plot
nsdnrc %>%
ggplot(aes(reorder(sentiment, -n), n)) +
geom_bar(stat = "identity", color = "#88A48F", fill = "#ABCFB4") +
ggtitle("NRC Sentiments In No Stress and Describe Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Type") +
ylim(0, 500) +
theme(plot.title = element_text(hjust = 0.5))
# no stress session and imagine prompt ------------------------------------
nsinrc <- nrc %>%
inner_join(nsitoken) %>%
count(sentiment, sort = TRUE)
## Joining, by = "word"
nsinrc
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 positive 425
## 2 joy 276
## 3 anticipation 266
## 4 trust 238
## 5 surprise 132
## 6 negative 111
## 7 anger 106
## 8 fear 48
## 9 sadness 45
## 10 disgust 22
# visualize sentiments on a bar plot
nsinrc %>%
ggplot(aes(reorder(sentiment, -n), n)) +
geom_bar(stat = "identity", color = "#AB9BB5", fill = "#CDBBD9") +
ggtitle("NRC Sentiments In No Stress and Imagine Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Type") +
ylim(0, 500) +
theme(plot.title = element_text(hjust = 0.5))
# Analyze _____ sentiment ------------------------------------------
# now let's look into specific sentiments and the words used in them
#sdfear <- nrc %>%
# filter(sentiment == "fear") %>%
# inner_join(sdtoken, by = "word") %>%
# arrange(desc(n))
#sdfear
https://bookdown.org/Maxine/tidy-text-mining/the-sentiments-dataset.html This dictionary assigns words with a score between -5 and 5, negative scores indicating a negative sentiment, and positive scores indicating a positive sentiment. It will be useful in detecting how positive or negative these words are.
afinn <- get_sentiments("afinn") # obtain afinn lexicon
glimpse(afinn) # view the sentiment types within the afinn lexicon
## Rows: 2,477
## Columns: 2
## $ word <chr> "abandon", "abandoned", "abandons", "abducted", "abduction", "ab…
## $ value <dbl> -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, 2, 2, 1, -1, -1, 2, 2, 2…
# stress session and describe prompt ------------------------------------
# create data to show each word and it's afinn sentiment value
sdafinn <- sdtoken %>%
inner_join(afinn)
## Joining, by = "word"
# summarize this data but counting how many of each sentiment value there is in the data... let's plot this!
sdafinncount <- sdafinn %>%
count(value, sort = TRUE)
# visualize sentiment values on a bar plot -----
sdafinncount %>%
ggplot(aes(x = value, y = n)) +
geom_bar(stat = "identity", color = "#9DB6BC", fill = "#B2D1D8") +
ggtitle("AFINN Sentiment Values In Stress and Describe Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Value") +
ylim(0, 50) +
xlim(-5, 5) +
scale_x_continuous(breaks = c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)) +
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# stress session and imagine prompt ------------------------------------
# create data to show each word and it's afinn sentiment value
siafinn <- sitoken %>%
inner_join(afinn)
## Joining, by = "word"
# summarize this data but counting how many of each sentiment value there is in the data... let's plot this!
siafinncount <- siafinn %>%
count(value, sort = TRUE)
# visualize sentiment values on a bar plot
siafinncount %>%
ggplot(aes(x = value, y = n)) +
geom_bar(stat = "identity", color = "#8895B4", fill = "#A3B3D8") +
ggtitle("AFINN Sentiment Values In Stress and Imagine Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Value") +
ylim(0, 50) +
xlim(-5, 5) +
scale_x_continuous(breaks = c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)) +
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# no stress session and describe prompt ------------------------------------
# create data to show each word and it's afinn sentiment value
nsdafinn <- nsdtoken %>%
inner_join(afinn)
## Joining, by = "word"
# summarize this data but counting how many of each sentiment value there is in the data... let's plot this!
nsdafinncount <- nsdafinn %>%
count(value, sort = TRUE)
# visualize sentiment values on a bar plot
nsdafinncount %>%
ggplot(aes(x = value, y = n)) +
geom_bar(stat = "identity", color = "#88A48F", fill = "#ABCFB4") +
ggtitle("AFINN Sentiment Values In No Stress and Describe Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Value") +
ylim(0, 50) +
xlim(-5, 5) +
scale_x_continuous(breaks = c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)) +
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# no stress session and imagine prompt ------------------------------------
# create data to show each word and it's afinn sentiment value
nsiafinn <- nsitoken %>%
inner_join(afinn)
## Joining, by = "word"
# summarize this data but counting how many of each sentiment value there is in the data... let's plot this!
nsiafinncount <- nsiafinn %>%
count(value, sort = TRUE)
# visualize sentiment values on a bar plot
nsiafinncount %>%
ggplot(aes(x = value, y = n)) +
geom_bar(stat = "identity", color = "#AB9BB5", fill = "#CDBBD9") +
ggtitle("AFINN Sentiment Values In No Stress and Imagine Scenario") +
ylab("Frequency of Sentiment Occurrence") +
xlab("Sentiment Value") +
ylim(0, 50) +
xlim(-5, 5) +
scale_x_continuous(breaks = c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)) +
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# similar trends in the imagine scenarios ... very similar value amounts
# stress/decribe seems to be the main one that differs in trends
https://bookdown.org/Maxine/tidy-text-mining/the-sentiments-dataset.html This dictionary was deveoped to financial sentiments and aoids words like “share”, “fool”, “liability”, and “risk” due to their altered meanings in financial settings. It divides words into “constraining, litigious, negative, positive, superfluous, and uncertainty.”
What each name given is for/stands for: